In [37]:
import os
import sys

In [38]:
from importlib import reload

In [2]:
import pandas

In [3]:
sys.path.append(os.path.join(os.path.abspath('../..'), 'src'))

In [4]:
import utils

In [7]:
import init_NCCampaignDocument_table

In [53]:
reload(init_NCCampaignDocument_table)


Out[53]:
<module 'init_NCCampaignDocument_table' from '/home/immersinn/gits/ncga/src/init_NCCampaignDocument_table.py'>

In [88]:
enum_options = '24-Hour Electioneering Communications','48-Hour','Annual','Audit Letter','Campaign Reporter','Candidate Designation of Committee Funds','Candidate Specific Communications','Certification of Inactive Status','Certification of Incorporated Political Committee','Certification of Return to Active Status','Certification of Threshold','Certification of Treasurer','Certification to Close Committee','Contribution from a Business Account Statement','Correspondence Returned Undeliverable or Unclaimed','Declaration of Intent','Deferred Notice','District Attorney Letter','Electioneering Communications Report','Federal Mid Year','Federal Year End','Final','First Quarter','Forgiven Loan Statement','Fourth Quarter','Independent Expenditure Political Committee Certification','Independent Expenditure Report','Independent Expenditure for Registered Committees','Independent Expenditure for non-Committees','Interim','Judicial Qualifying Contributions Report','Loan Proceeds Statement','Mid Year Semi-Annual','Miscellaneous Correspondence','Monthly','Municipal Voter-Owned Election Qualifying Contributions','Non-Compliance Letter','Non-Participating Candidate','Notice of Candidacy','Notice of Termination of Active Status','Notification of Change to Reporting Schedule','Organizational','Other','Paid Penalty Assessment or Forfeiture','Penalty Appeal','Penalty Appeal Decision','Penalty Assessment','Penalty Assessment Letter','Penalty Resolution Agreement Executed','Penalty Resolution Agreement Proposal','Penalty Waiver Letter','Political Party Executive Committee Exempt Sales Plan','Post General','Post Primary','Pre-Election','Pre-Primary','Pre-Referendum','Pre-Runoff','Rescind Letter','Second Quarter','Signed Penalty Waiver Agreement','Special','Statement of Organization','Supplemental Final','Ten-day','Third Quarter','Thirty-day','Thirty-five-day','Twelve-day','Voter-Owned Election Qualifying Contributions','Weekly','Year End Semi-Annual'
enum_options = set(enum_options)

In [89]:
list(enum_options)[:3]


Out[89]:
['Post Primary', '48-Hour', 'Independent Expenditure Report']

Load 2014


In [70]:
df = init_NCCampaignDocument_table.load_all()

In [86]:
df.shape


Out[86]:
(35050, 9)

In [87]:
df.head()


Out[87]:
committee report_year report_type amend rec_date start_date end_date image_link data_link
0 01ST CONG DIST DEC 2014 Fourth Quarter 0 2015-01-06 2014-10-19 2014-12-31 http://cf.ncsbe.gov/CFOrgLkup/cf_report_image.... http://cf.ncsbe.gov/CFOrgLkup/cf_report_sectio...
1 01ST CONG DIST DEC 2014 Third Quarter 1 2015-01-06 2014-07-01 2014-10-18 http://cf.ncsbe.gov/CFOrgLkup/cf_report_image.... http://cf.ncsbe.gov/CFOrgLkup/cf_report_sectio...
2 01ST CONG DIST DEC 2014 Paid Penalty Assessment or Forfeiture None 2014-11-07 None None http://cf.ncsbe.gov/CFOrgLkup/cf_report_image....
3 01ST CONG DIST DEC 2014 Penalty Assessment Letter None 2014-10-30 None None http://cf.ncsbe.gov/CFOrgLkup/cf_report_image....
4 01ST CONG DIST DEC 2014 Third Quarter 0 2014-10-27 2014-07-01 2014-10-18 http://cf.ncsbe.gov/CFOrgLkup/cf_report_image....

In [90]:
def test_member(rt):
    if not rt:
        return(0)
    else:
        if rt in enum_options:
            return(0)
        else:
            return(1)

nienum = df.report_type.apply(test_member)

In [93]:
import numpy

In [100]:
for ind in numpy.where(nienum)[0]:
    print(df.report_type[ind])


Penalty Assessment - 30 Days Aged

Try Inserts


In [65]:
keep = []
count = 0
for tup in df2014.itertuples():
    keep.append(list(tup)[1:])
    count += 1
    if count > 5000:
        break

In [66]:
keep[0]


Out[66]:
['01ST CONG DIST DEC',
 '2014',
 'Fourth Quarter',
 '0',
 '2015-01-06',
 '2014-10-19',
 '2014-12-31',
 'http://cf.ncsbe.gov/CFOrgLkup/cf_report_image.aspx?DID=182000',
 'http://cf.ncsbe.gov/CFOrgLkup/cf_report_sections.aspx?RID=132598&SID=STA-C3826N-C-001&CN=01ST%20CONG%20DIST%20DEC&RN=2014%20Fourth%20Quarter']

In [67]:
keep = [{col:entry for col,entry in zip(df2014.columns, col)} for col in keep]
keep[0]


Out[67]:
{'amend': '0',
 'committee': '01ST CONG DIST DEC',
 'data_link': 'http://cf.ncsbe.gov/CFOrgLkup/cf_report_sections.aspx?RID=132598&SID=STA-C3826N-C-001&CN=01ST%20CONG%20DIST%20DEC&RN=2014%20Fourth%20Quarter',
 'end_date': '2014-12-31',
 'image_link': 'http://cf.ncsbe.gov/CFOrgLkup/cf_report_image.aspx?DID=182000',
 'rec_date': '2015-01-06',
 'report_type': 'Fourth Quarter',
 'report_year': '2014',
 'start_date': '2014-10-19'}

In [14]:
import mysql_utils

In [182]:
reload(mysql_utils)


Out[182]:
<module 'mysql_utils' from '/home/immersinn/gits/ncga/src/mysql_utils.py'>

In [21]:
add_entry = ("INSERT INTO " + 'camp_doc_lu' + " "
                 "(committee, report_year, report_type, amend, rec_date, start_date, end_date, image_link, data_link) "
                 "VALUES (%(committee)s, %(report_year)s, %(report_type)s, "
                 "%(amend)s, %(rec_date)s, %(start_date)s, %(end_date)s, "
                 "%(image_link)s, %(data_link)s)")

In [22]:
add_entry


Out[22]:
'INSERT INTO camp_doc_lu (committee, report_year, report_type, amend, rec_date, start_date, end_date, image_link, data_link) VALUES (%(committee)s, %(report_year)s, %(report_type)s, %(amend)s, %(rec_date)s, %(start_date)s, %(end_date)s, %(image_link)s, %(data_link)s)'

In [68]:
with mysql_utils.curInsert() as cursor:
    for i,entry in enumerate(keep):
        cursor.execute(add_entry, entry)

In [69]:
i


Out[69]:
5000

In [52]:
keep[13]


Out[52]:
{'amend': '0',
 'committee': '01ST CONG DIST REC',
 'data_link': '',
 'end_date': None,
 'image_link': 'http://cf.ncsbe.gov/CFOrgLkup/cf_report_image.aspx?DID=172920',
 'rec_date': '2014-04-29',
 'report_type': '',
 'report_year': '2014',
 'start_date': None}

In [ ]: